library(tidyverse)

load(here::here('data/data.RData'))

data %>%
  separate_rows(OG, sep = ";") %>%
  distinct() %>%
  mutate(OG = str_trim(OG)) %>%
  filter(OG != "NA" & !is.na(OG)) %>%
  mutate(OG = if_else(OG == "Puc Minas","PUC/MG",OG),
         OG = if_else(OG == "PUC/Brasília","UCB",OG)) -> data_OG

data_OG %>% 
  filter(SO == "Estudos Internacionais: Revista de Relações Internacionais da PUC Minas") %>% 
  group_by(OG) %>% 
  count() %>% 
  arrange(desc(n))


data %>%
  filter(!str_detect(OG, "NA") & !is.na(OG)) %>%
  group_by(SO) %>%
  count() %>%
  rename(
    "n_articles" = n
  ) -> n_artices_per_SO_notNA

n_artices_per_SO_notNA

data_OG %>%
  filter(!str_detect(OG, "NA") & !is.na(OG)) %>%
  group_by(SO,OG) %>%
  count() %>%
  ungroup() %>%
  group_by(SO) %>%
  count() %>%
  rename("n_distinct_og" = n) -> n_distinct_OG


data_OG %>%
  distinct() %>%
  group_by(OG,SO) %>%
  count() %>%
  arrange(desc(n)) %>%
  ungroup() %>%
  group_by(SO) %>%
  slice(1) %>%
  rename("n_primary_SO" = n) %>%
  left_join(., n_artices_per_SO_notNA) %>%
  mutate(
    prop_primary_OG_per_article = round(n_primary_SO/n_articles,2)
  ) %>%
  arrange(desc(prop_primary_OG_per_article)) %>%
  select(SO,OG,n_primary_SO, n_articles,prop_primary_OG_per_article) %>%
  rename("primary_OG" = OG, "n_primary_OG" = n_primary_SO) %>%
  left_join(n_distinct_OG) -> dat8_2a

dat8_2a$institutional_diversity <-  round(dat8_2a$n_distinct_og/dat8_2a$n_articles,2)
dat8_2a
##############################################################################

data %>%
  filter(SO == "Meridiano 47 - Journal of Global Studies" & PY > 2015) %>%
  filter(!str_detect(OG, "NA") & !is.na(OG)) %>%
  group_by(SO) %>%
  count() %>%
  rename(
    "n_articles" = n
  ) -> n_artices_per_SO_notNA_meridiano

data_OG %>%
  filter(!str_detect(OG, "NA") & !is.na(OG)) %>%
  filter(SO == "Meridiano 47 - Journal of Global Studies" & PY > 2015) %>%
  group_by(SO,OG) %>%
  count() %>%
  ungroup() %>%
  group_by(SO) %>%
  count() %>%
  rename("n_distinct_og" = n) -> n_distinct_OG_meridiano

data_OG %>%
  distinct() %>%
  filter(SO == "Meridiano 47 - Journal of Global Studies" & PY > 2015) %>%
  group_by(OG,SO) %>%
  count() %>%
  arrange(desc(n)) %>%
  ungroup() %>%
  group_by(SO) %>%
  slice(1) %>%
  rename("n_primary_SO" = n) %>%
  left_join(., n_artices_per_SO_notNA_meridiano) %>%
  mutate(
    prop_primary_OG_per_article = round(n_primary_SO/n_articles,2)
  ) %>%
  arrange(desc(prop_primary_OG_per_article)) %>%
  select(SO,OG,n_primary_SO, n_articles,prop_primary_OG_per_article) %>%
  rename("primary_OG" = OG, "n_primary_OG" = n_primary_SO) %>%
  left_join(n_distinct_OG_meridiano) -> dat8_2a_meridiano

dat8_2a_meridiano$institutional_diversity <-  round(dat8_2a_meridiano$n_distinct_og/dat8_2a_meridiano$n_articles,2)

dat8_2a_meridiano %>%
  mutate(SO = if_else(SO == "Meridiano 47 - Journal of Global Studies","Meridiano 47 (2016-2021)",SO)) %>%
  bind_rows(dat8_2a) %>%
  mutate(SO = if_else(SO == "Meridiano 47 - Journal of Global Studies","Meridiano 47 (full)",SO))  %>%
  select(SO, primary_OG, prop_primary_OG_per_article) %>%  # prop_primary.. of Estudos Internacionais is considering UFRGS, in the article we considered PUC/MG because it is the institution of origin
  arrange(desc(prop_primary_OG_per_article))
